# install.packages("UKgrid")
# install.packages("timetk")
pacman::p_load(tidyverse, lubridate, UKgrid, janitor, timetk)
options(scipen = 999)


# check the current time
time <- Sys.time()
time
## [1] "2022-12-13 21:50:04 EAT"
# check the current date
date <- Sys.Date()
date
## [1] "2022-12-13"
class(date)
## [1] "Date"
class(time)
## [1] "POSIXct" "POSIXt"
time_lt <- as.POSIXlt(time)
class(time_lt)
## [1] "POSIXlt" "POSIXt"
unclass(time)
## [1] 1670957405
class(unclass(time_lt))
## [1] "list"
search()
##  [1] ".GlobalEnv"        "package:timetk"    "package:janitor"  
##  [4] "package:UKgrid"    "package:lubridate" "package:forcats"  
##  [7] "package:stringr"   "package:dplyr"     "package:purrr"    
## [10] "package:readr"     "package:tidyr"     "package:tibble"   
## [13] "package:ggplot2"   "package:tidyverse" "package:stats"    
## [16] "package:graphics"  "package:grDevices" "package:utils"    
## [19] "package:datasets"  "package:methods"   "Autoloads"        
## [22] "package:base"
dates_df <- read_csv("https://raw.githubusercontent.com/PacktPublishing/Hands-On-Time-Series-Analysis-with-R/master/Chapter02/dates_formats.csv")
## Rows: 22 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Japanese_format, US_format, US_long_format, CA_mix_format, SA_mix_f...
## dbl (1): Excel_Numeric_Format
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(dates_df)
## Rows: 22
## Columns: 7
## $ Japanese_format      <chr> "2017/1/20", "2017/1/21", "2017/1/22", "2017/1/23…
## $ US_format            <chr> "1/20/2017", "1/21/2017", "1/22/2017", "1/23/2017…
## $ US_long_format       <chr> "Friday, January 20, 2017", "Saturday, January 21…
## $ CA_mix_format        <chr> "January 20, 2017", "January 21, 2017", "January …
## $ SA_mix_format        <chr> "20 January 2017", "21 January 2017", "22 January…
## $ NZ_format            <chr> "20/01/2017", "21/01/2017", "22/01/2017", "23/01/…
## $ Excel_Numeric_Format <dbl> 42755, 42756, 42757, 42758, 42759, 42760, 42761, …
class(dates_df)
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
dates_df %>%
  mutate(Japanese_format = parse_date_time(Japanese_format, order = "ymd"))
Japanese_format US_format US_long_format CA_mix_format SA_mix_format NZ_format Excel_Numeric_Format
2017-01-20 1/20/2017 Friday, January 20, 2017 January 20, 2017 20 January 2017 20/01/2017 42755
2017-01-21 1/21/2017 Saturday, January 21, 2017 January 21, 2017 21 January 2017 21/01/2017 42756
2017-01-22 1/22/2017 Sunday, January 22, 2017 January 22, 2017 22 January 2017 22/01/2017 42757
2017-01-23 1/23/2017 Monday, January 23, 2017 January 23, 2017 23 January 2017 23/01/2017 42758
2017-01-24 1/24/2017 Tuesday, January 24, 2017 January 24, 2017 24 January 2017 24/01/2017 42759
2017-01-25 1/25/2017 Wednesday, January 25, 2017 January 25, 2017 25 January 2017 25/01/2017 42760
2017-01-26 1/26/2017 Thursday, January 26, 2017 January 26, 2017 26 January 2017 26/01/2017 42761
2017-01-27 1/27/2017 Friday, January 27, 2017 January 27, 2017 27 January 2017 27/01/2017 42762
2017-01-28 1/28/2017 Saturday, January 28, 2017 January 28, 2017 28 January 2017 28/01/2017 42763
2017-01-29 1/29/2017 Sunday, January 29, 2017 January 29, 2017 29 January 2017 29/01/2017 42764
2017-01-30 1/30/2017 Monday, January 30, 2017 January 30, 2017 30 January 2017 30/01/2017 42765
2017-01-31 1/31/2017 Tuesday, January 31, 2017 January 31, 2017 31 January 2017 31/01/2017 42766
2017-02-01 2/1/2017 Wednesday, February 1, 2017 February 1, 2017 01 February 2017 1/2/2017 42767
2017-02-02 2/2/2017 Thursday, February 2, 2017 February 2, 2017 02 February 2017 2/2/2017 42768
2017-02-03 2/3/2017 Friday, February 3, 2017 February 3, 2017 03 February 2017 3/2/2017 42769
2017-02-04 2/4/2017 Saturday, February 4, 2017 February 4, 2017 04 February 2017 4/2/2017 42770
2017-02-05 2/5/2017 Sunday, February 5, 2017 February 5, 2017 05 February 2017 5/2/2017 42771
2017-02-06 2/6/2017 Monday, February 6, 2017 February 6, 2017 06 February 2017 6/2/2017 42772
2017-02-07 2/7/2017 Tuesday, February 7, 2017 February 7, 2017 07 February 2017 7/2/2017 42773
2017-02-08 2/8/2017 Wednesday, February 8, 2017 February 8, 2017 08 February 2017 8/2/2017 42774
2017-02-09 2/9/2017 Thursday, February 9, 2017 February 9, 2017 09 February 2017 9/2/2017 42775
2017-02-10 2/10/2017 Friday, February 10, 2017 February 10, 2017 10 February 2017 10/2/2017 42776
dates_df %>%
  mutate(
    Japanese_format = ymd(Japanese_format),
    US_format = mdy(US_format),
    US_long_format = mdy(US_long_format),
    SA_mix_format = dmy(SA_mix_format)
  ) %>%
  view()

time_US_str <- "Monday, December 31, 2018 11:59:59 PM"

mdy_hms(time_US_str, tz = "Africa/Nairobi")
## [1] "2018-12-31 23:59:59 EAT"
view(UKgrid)
rm(UKgrid)
## Warning in rm(UKgrid): object 'UKgrid' not found
uk_data <-
  UKgrid %>%
  # lowercase all column names
  clean_names() %>%
  select(timestamp, nd)



year(uk_data$timestamp)[1:5]
## [1] 2005 2005 2005 2005 2005
month(uk_data$timestamp, label = T)[1:5]
## [1] Apr Apr Apr Apr Apr
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
uk_data %>%
  mutate(month = month(timestamp, label = T)) %>%
  tail()
timestamp nd month
254587 2019-10-08 21:00:00 31151 Oct
254588 2019-10-08 21:30:00 29294 Oct
254589 2019-10-08 22:00:00 27308 Oct
254590 2019-10-08 22:30:00 25422 Oct
254591 2019-10-08 23:00:00 23487 Oct
254592 2019-10-08 23:30:00 21974 Oct
# we can group_by() the year and month to aggregate to a monthly frequency.
uk_data %>%
  group_by(year(timestamp), month(timestamp, label = T)) %>%
  summarise(nd = sum(nd))
## `summarise()` has grouped output by 'year(timestamp)'. You can override using
## the `.groups` argument.
year(timestamp) month(timestamp, label = T) nd
2005 Apr 55860950
2005 May 53290225
2005 Jun 50384699
2005 Jul 50821971
2005 Aug 50932241
2005 Sep 51640229
2005 Oct 56185094
2005 Nov 62108220
2005 Dec 65248152
2006 Jan 66849497
2006 Feb 60579739
2006 Mar NA
2006 Apr 54663527
2006 May 53186630
2006 Jun 49795953
2006 Jul 51547742
2006 Aug 50713472
2006 Sep 51215300
2006 Oct 55663834
2006 Nov 59846855
2006 Dec 62140504
2007 Jan 63732161
2007 Feb 57337349
2007 Mar NA
2007 Apr 51450615
2007 May 52325269
2007 Jun 49619310
2007 Jul 50586453
2007 Aug 50375549
2007 Sep 50934675
2007 Oct 57333540
2007 Nov 60162398
2007 Dec 63153689
2008 Jan 63890011
2008 Feb 59346543
2008 Mar NA
2008 Apr 55971856
2008 May 51235828
2008 Jun 48708372
2008 Jul 50252201
2008 Aug 49106426
2008 Sep 50424585
2008 Oct 55383466
2008 Nov 57658081
2008 Dec 61310313
2009 Jan 63238386
2009 Feb 56071814
2009 Mar NA
2009 Apr 49312584
2009 May 48428307
2009 Jun 46902613
2009 Jul 47749333
2009 Aug 46796255
2009 Sep 47432398
2009 Oct 52111312
2009 Nov 54089976
2009 Dec 60551151
2010 Jan 64161466
2010 Feb 56806305
2010 Mar NA
2010 Apr 49842384
2010 May 49267519
2010 Jun 46469416
2010 Jul 47404090
2010 Aug 46201165
2010 Sep 47581174
2010 Oct 52365421
2010 Nov 57183714
2010 Dec 65016325
2011 Jan 61790902
2011 Feb 53644420
2011 Mar NA
2011 Apr 47332419
2011 May 47606475
2011 Jun 46423796
2011 Jul 46637732
2011 Aug 46887222
2011 Sep 47023328
2011 Oct 50414498
2011 Nov 53419646
2011 Dec 57166874
2012 Jan 58518689
2012 Feb 56589617
2012 Mar NA
2012 Apr 50344841
2012 May 49806625
2012 Jun 45197770
2012 Jul 46521301
2012 Aug 46153913
2012 Sep 45943095
2012 Oct 52176933
2012 Nov 54836483
2012 Dec 57666386
2013 Jan 60083628
2013 Feb 54298601
2013 Mar NA
2013 Apr 50870393
2013 May 47325312
2013 Jun 43811218
2013 Jul 46156155
2013 Aug 44900214
2013 Sep 46089789
2013 Oct 49737150
2013 Nov 54003270
2013 Dec 54754913
2014 Jan 56909093
2014 Feb 50388456
2014 Mar NA
2014 Apr 46387968
2014 May 45609432
2014 Jun 43056191
2014 Jul 44616054
2014 Aug 42917349
2014 Sep 44808525
2014 Oct 48141164
2014 Nov 51342735
2014 Dec 54481603
2015 Jan 56481442
2015 Feb 51734473
2015 Mar NA
2015 Apr 44697912
2015 May 43387305
2015 Jun 41306952
2015 Jul 42292826
2015 Aug 41883601
2015 Sep 43182545
2015 Oct 48303656
2015 Nov 49263983
2015 Dec 49323073
2016 Jan 53584912
2016 Feb 50024141
2016 Mar NA
2016 Apr 45057811
2016 May 41471080
2016 Jun 40653586
2016 Jul 40197282
2016 Aug 39749783
2016 Sep 40629190
2016 Oct 45825859
2016 Nov 51077791
2016 Dec 51575801
2017 Jan 55101622
2017 Feb 47109847
2017 Mar NA
2017 Apr 40943010
2017 May 41025654
2017 Jun 37999934
2017 Jul 39033334
2017 Aug 38891221
2017 Sep 40287507
2017 Oct 43429259
2017 Nov 48595772
2017 Dec 51724980
2018 Jan 52607241
2018 Feb 48256605
2018 Mar NA
2018 Apr 42945982
2018 May 39424987
2018 Jun 37396587
2018 Jul 39412416
2018 Aug 38957886
2018 Sep 38626177
2018 Oct 43913596
2018 Nov 47387903
2018 Dec 49016945
2019 Jan 52716003
2019 Feb 44499966
2019 Mar NA
2019 Apr 41082917
2019 May 39777740
2019 Jun 36927053
2019 Jul 38304773
2019 Aug 36778929
2019 Sep 37547370
2019 Oct 10905359
# That does what we want, but it also separates the year and month
# into separate columns and converts them non-DateTime objects.
# There’s a better way! The timetk package provides
# a summarize_by_time() function that does exactly what we want.

uk_monthly_data <- uk_data %>%
  summarize_by_time(
    .date_var = timestamp,
    .by = "month",
    nd = sum(nd, na.rm = TRUE)
  )

# As a final step, let’s also restrict our data to fall between 2006
# and 2018. Again, we could do this with filter(year(timestamp) >= 2006 &
# year(timestamp) <= 2018) but then we would run into the same problems as
# with the summarize() function. Instead, we’ll use filter_by_time():

uk_monthly_data %>%
  filter_by_time(
    .date_var = timestamp,
    .start_date = "2006",
    .end_date = "2018"
  )
timestamp nd
2006-01-01 66849497
2006-02-01 60579739
2006-03-01 65731906
2006-04-01 54663527
2006-05-01 53186630
2006-06-01 49795953
2006-07-01 51547742
2006-08-01 50713472
2006-09-01 51215300
2006-10-01 55663834
2006-11-01 59846855
2006-12-01 62140504
2007-01-01 63732161
2007-02-01 57337349
2007-03-01 60420637
2007-04-01 51450615
2007-05-01 52325269
2007-06-01 49619310
2007-07-01 50586453
2007-08-01 50375549
2007-09-01 50934675
2007-10-01 57333540
2007-11-01 60162398
2007-12-01 63153689
2008-01-01 63890011
2008-02-01 59346543
2008-03-01 60473995
2008-04-01 55971856
2008-05-01 51235828
2008-06-01 48708372
2008-07-01 50252201
2008-08-01 49106426
2008-09-01 50424585
2008-10-01 55383466
2008-11-01 57658081
2008-12-01 61310313
2009-01-01 63238386
2009-02-01 56071814
2009-03-01 56518546
2009-04-01 49312584
2009-05-01 48428307
2009-06-01 46902613
2009-07-01 47749333
2009-08-01 46796255
2009-09-01 47432398
2009-10-01 52111312
2009-11-01 54089976
2009-12-01 60551151
2010-01-01 64161466
2010-02-01 56806305
2010-03-01 57716147
2010-04-01 49842384
2010-05-01 49267519
2010-06-01 46469416
2010-07-01 47404090
2010-08-01 46201165
2010-09-01 47581174
2010-10-01 52365421
2010-11-01 57183714
2010-12-01 65016325
2011-01-01 61790902
2011-02-01 53644420
2011-03-01 57306818
2011-04-01 47332419
2011-05-01 47606475
2011-06-01 46423796
2011-07-01 46637732
2011-08-01 46887222
2011-09-01 47023328
2011-10-01 50414498
2011-11-01 53419646
2011-12-01 57166874
2012-01-01 58518689
2012-02-01 56589617
2012-03-01 54141450
2012-04-01 50344841
2012-05-01 49806625
2012-06-01 45197770
2012-07-01 46521301
2012-08-01 46153913
2012-09-01 45943095
2012-10-01 52176933
2012-11-01 54836483
2012-12-01 57666386
2013-01-01 60083628
2013-02-01 54298601
2013-03-01 58907274
2013-04-01 50870393
2013-05-01 47325312
2013-06-01 43811218
2013-07-01 46156155
2013-08-01 44900214
2013-09-01 46089789
2013-10-01 49737150
2013-11-01 54003270
2013-12-01 54754913
2014-01-01 56909093
2014-02-01 50388456
2014-03-01 52260609
2014-04-01 46387968
2014-05-01 45609432
2014-06-01 43056191
2014-07-01 44616054
2014-08-01 42917349
2014-09-01 44808525
2014-10-01 48141164
2014-11-01 51342735
2014-12-01 54481603
2015-01-01 56481442
2015-02-01 51734473
2015-03-01 52456541
2015-04-01 44697912
2015-05-01 43387305
2015-06-01 41306952
2015-07-01 42292826
2015-08-01 41883601
2015-09-01 43182545
2015-10-01 48303656
2015-11-01 49263983
2015-12-01 49323073
2016-01-01 53584912
2016-02-01 50024141
2016-03-01 51083983
2016-04-01 45057811
2016-05-01 41471080
2016-06-01 40653586
2016-07-01 40197282
2016-08-01 39749783
2016-09-01 40629190
2016-10-01 45825859
2016-11-01 51077791
2016-12-01 51575801
2017-01-01 55101622
2017-02-01 47109847
2017-03-01 47652508
2017-04-01 40943010
2017-05-01 41025654
2017-06-01 37999934
2017-07-01 39033334
2017-08-01 38891221
2017-09-01 40287507
2017-10-01 43429259
2017-11-01 48595772
2017-12-01 51724980
2018-01-01 52607241
2018-02-01 48256605
2018-03-01 51572809
2018-04-01 42945982
2018-05-01 39424987
2018-06-01 37396587
2018-07-01 39412416
2018-08-01 38957886
2018-09-01 38626177
2018-10-01 43913596
2018-11-01 47387903
2018-12-01 49016945
head(uk_data)
timestamp nd
2005-04-01 00:00:00 32926
2005-04-01 00:30:00 32154
2005-04-01 01:00:00 33633
2005-04-01 01:30:00 34574
2005-04-01 02:00:00 34720
2005-04-01 02:30:00 34452
glimpse(uk_data)
## Rows: 254,592
## Columns: 2
## $ timestamp <dttm> 2005-04-01 00:00:00, 2005-04-01 00:30:00, 2005-04-01 01:00:…
## $ nd        <int> 32926, 32154, 33633, 34574, 34720, 34452, 33818, 32951, 3244…
ggplot(data = uk_monthly_data, aes(x = timestamp, y = nd)) +
  geom_line() +
  geom_smooth(se = FALSE) +
  labs(x = "Monthly Data", y = "Electricity Demand") +
  scale_x_datetime(date_breaks = "year", date_labels = "%b-%Y") +
  theme(
    axis.text.x = element_text(angle = 90)
  )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

rm(time_lt, date, time)

plot_time_series(uk_monthly_data,
  .date_var = timestamp,
  .value = nd,
  .interactive = TRUE,
  .x_lab = "Monthy Data",
  .y_lab = "Electricity Demand"
)